import logging
import os.path
import re
import shutil

import requests



if os.path.exists("Img"):
    shutil.rmtree("Img")
os.makedirs("Img")

target_file = "Img"

base_url = "https:"


from utils.loggerutils import logger_config

logger_config()

logger = logging.getLogger('__name__')

url = "https://www.vcg.com/creative-image/tengxun/?page=2"
header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
}
response = requests.get(url,headers= header)

"""
<a class="imgWaper" target="_blank" rel="opener" 
href="https://www.vcg.com/creative/1527141855.html" 
title="深圳前海大铲湾腾讯企鹅岛建设中图片"><img class="lazyload_hk ll_loaded" data-src="//vcg02.cfp.cn/creative/vcg/nowater800/new/VCG211527141855.jpg" 
data-min="//vcg01.cfp.cn/creative/vcg/400/new/VCG211527141855.jpg" src="//vcg02.cfp.cn/creative/vcg/nowater800/new/VCG211527141855.jpg?x-oss-process=image/format,webp"
 title="深圳前海大铲湾腾讯企鹅岛建设中图片购买" alt="深圳前海大铲湾腾讯企鹅岛建设中图片下载" draggable="true"><div class="mask"></div></a>
"""

#
items = re.findall(r'<a class="imgWaper" target=".*?".*?title="(.*?).*?"><img.*?src="(.*?)".*?><div class="mask"></div></a>', response.text,re.DOTALL)
# 修改为更灵活的匹配模式
# items = re.findall(r'<a class="imgWaper".*?title="(.*?)".*?><img.*?src="(.*?)".*?><div class="mask"></div></a>', response.text, re.DOTALL)

print(len(items))

for item in items:

    img_url = base_url + item[1]
    img = requests.get(img_url)
    logger.info(f"正在爬取中......图片标题是：{item[0]}，封面图片是：{img_url}")
    with open (f"{target_file}/{item[0]}.jpg",'wb') as f :
        f.write(img.content)
        logger.info(f"正在爬取中......图片保存成功")
    print(item)